# Scrapy settings for PeteParker project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
#     http://doc.scrapy.org/en/latest/topics/settings.html
#

BOT_NAME = 'PeteParker'

SPIDER_MODULES = ['PeteParker.spiders']
NEWSPIDER_MODULE = 'PeteParker.spiders'
ITEM_PIPELINES = {
    'PeteParker.pipelines.MainPipeline': 100,
    }

DOWNLOADER_MIDDLEWARES = {
    'scrapy.contrib.downloadermiddleware.redirect.BaseRedirectMiddleware': None,
    'PeteParker.CustomRedirectMiddleware.BaseRedirectMiddleware': 100
    }

# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'PeteParker (+http://www.yourdomain.com)'
# Crawl responsibly by identifying yourself (and your website) on     the user-agent
USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.3; Nexus 4 Build/JWR66Y) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36'


CONCURRENT_ITEMS = 5
CONCURRENT_REQUESTS = 3

CONCURRENT_REQUESTS_PER_DOMAIN = 1
CONCURRENT_REQUESTS_PER_IP = 1


COOKIES_ENABLED = True
COOKIES_DEBUG = True

#DEFAULT_REQUEST_HEADERS = {}

DEPTH_LIMIT = 3
#DEPTH_PRIORITY
#DEPTH_STATS
#DEPTH_STATS_VERBOSE

#DNSCACHE_ENABLED

DOWNLOADER_DEBUG = True
DOWNLOADER_STATS = True
#DOWNLOAD_DELAY
#DOWNLOAD_TIMEOUT
RANDOMIZE_DOWNLOAD_DELAY = True

LOG_ENABLED = True
#LOG_ENCODING
LOG_FILE = 'log'
#LOG_LEVEL
#LOG_STDOUT

HTTPCACHE_ENABLED = True
HTTPCACHE_EXPIRATION_SECS = 86400
#HTTPCACHE_DIR
#HTTPCACHE_IGNORE_HTTP_CODES
#HTTPCACHE_IGNORE_MISSING
#HTTPCACHE_IGNORE_SCHEMES
#HTTPCACHE_IGNORE_SCHEMES
#HTTPCACHE_DBM_MODULE
#HTTPCACHE_POLICY

REFERER_ENABLED = True
REDIRECT_ENABLED = True
REDIRECT_MAX_TIMES = 10
METAREFRESH_ENABLED = True
#REDIRECT_MAX_METAREFRESH_DELAY
#REDIRECT_PRIORITY_ADJUST
RETRY_ENABLED = True
RETRY_TIMES = 3
#RETRY_HTTP_CODES


ROBOTSTXT_OBEY = False

STATS_DUMP = True
#STATSMAILER_RCPTS

#URLLENGTH_LIMIT 
